<?php

ini_set('max_execution_time', 6000);
include_once dirname(dirname(dirname(__FILE__))) . '/include/common.php';
include_once 'simple_html_dom.php';
$core = new core();
//get and save url navagation
$html = new simple_html_dom();
// Load HTML from a URL
//$html->load_file("http://www.zara.com/es/en/");
$url = "http://www.zara.com/us/";
$begin = time();

$result_get_category = getCategory($url);

function getCategory($url){
    if (get_total_url_nav() == 0) {
        $data_html = get_content_html($url);
        if ($data_html['ERR'] != '') {
            echo $data_html['ERR'];
            die;
        }
        $html->load($data_html['EXE']);
        $core->db->query("DELETE FROM zen_crawl_category");
        $level = 1;
        //$render_content = '<table><tr><th>level</th><th>url</th><th>menu</th><th>error</th></tr>';
        echo '<table><tr><th>level</th><th>url</th><th>menu</th><th>error</th></tr>';
        if ($html->find('#mainNavigationMenu li')) {
            foreach ($html->find('#mainNavigationMenu li') as $li) {
                if ($li->find('a', 0)) {
                    $tag_a = $li->find('a', 0);
                    $src = $tag_a->href;
                    $nav_name = trim($tag_a->plaintext);
                    $core->db->query("INSERT INTO zen_crawl_category (url,nav_name,level,is_crawled,parent_id)
                        VALUES('$src','$nav_name',$level,0,0)");
                    //$render_content.='<tr><td>' . $level . '</td><td>' . $src . '</td><td>' . $nav_name . '</td><td></td></tr>';
                    echo '<tr><td>' . $level . '</td><td>' . $src . '</td><td>' . $nav_name . '</td><td></td></tr>';
                } else {
                    //$render_content.='<tr><td></td><td></td><td></td><td>Khong tim thay tag li> a</td></tr>';
                    echo '<tr><td></td><td></td><td></td><td>Khong tim thay tag li> a</td></tr>';
                    continue;
                }
            }
        } else {
            echo "Khong tim thay menu </br>";
            die;
        }
    } else {
        pre_continue_crawl();
    }

}

while ($nav = get_url_nav()) {
    $level = $nav->level + 1;
    //$render_content.='<tr colspan="4"><td>Crawl child menu ' . $level . ': '.$nav->url.'</td></tr>';
    echo '<tr colspan="4"><td>Crawl child menu ' . $level . ': ' . $nav->url . '</td></tr>';
    $time_running = time() - $begin;
    echo '<tr colspan="4"><td>time running = ' . $time_running . ', minute=' . ($time_running / 60) . '</td></tr>';

    $data_html = get_content_html($nav->url);
    if ($data_html['ERR'] != '') {
        echo $data_html['ERR'];
        die;
    }
    $html->load($data_html['EXE']);
    $repeat = 0;
    while (!$html->find('#mainNavigationMenu', 0)) {
        $repeat++;
        if ($repeat > 2) { //mot so trang ko co menu-->loai bo
            $core->db->query("UPDATE zen_crawl_category SET is_crawled=1 WHERE id=$nav->id");
            break;
        }
        //$render_content.='<tr><td></td><td></td><td></td><td>Khong tim thay menu </td></tr>';
        echo '<tr><td></td><td></td><td></td><td>Khong tim thay menu </td></tr>';
        sleep(2);
        $data_html = get_content_html($nav->url);
        if ($data_html['ERR'] != '') {
            echo $data_html['ERR'];
            die;
        }
        $html->load($data_html['EXE']);
    }
    if ($repeat > 2) {
        continue;
    }
    $core->db->query("UPDATE zen_crawl_category SET is_crawled=1 WHERE id=$nav->id");

    $menu = $html->find('#mainNavigationMenu', 0);
    if ($menu->find('a[href=' . $nav->url . ']', 0)) {
        $current_nav_tag_a = $menu->find('a[href=' . $nav->url . ']', 0);
        $sibling_tag_ul = $current_nav_tag_a->next_sibling();
        if ($sibling_tag_ul) {
            if ($sibling_tag_ul->find('li')) {
                foreach ($sibling_tag_ul->find('li') as $li) {
                    if ($li->find('a', 0)) {
                        $tag_a = $li->find('a', 0);
                        $src = $tag_a->href;
                        $nav_name = trim($tag_a->plaintext);
                        $core->db->query("INSERT INTO zen_crawl_category (url,nav_name,level,is_crawled,parent_id)
                        VALUES('$src','$nav_name',$level,0," . $nav->id . ")");
                        //$render_content.='<tr><td>' . $level . '</td><td>' . $src . '</td><td>' . $nav_name . '</td><td></td></tr>';
                        echo '<tr><td>' . $level . '</td><td>' . $src . '</td><td>' . $nav_name . '</td><td></td></tr>';
                    }
                }
            } else {
                //$render_content.='<tr><td></td><td></td><td></td><td>Khong tim thay tag ul > li</td></tr>';
                echo '<tr><td></td><td></td><td></td><td>Khong tim thay tag ul > li</td></tr>';
                continue;
            }
        } else {
            //$render_content.='<tr><td></td><td></td><td></td><td>Khong tim thay  ul: ' . $nav->url . ' </td></tr>';
            echo '<tr><td></td><td></td><td></td><td>Khong tim thay  ul: ' . $nav->url . ' </td></tr>';
            continue;
        }
    } else {


        //$render_content.='<tr><td></td><td></td><td></td><td>Khong tim thay ' . $nav->url . '</td></tr>';
        echo '<tr><td></td><td></td><td></td><td>Khong tim thay ' . $nav->url . '</td></tr>';
        continue;
    }
    sleep(2);
}

//$render_content.='</table>';
echo '</table>';
//echo $render_content;
die;

function get_content_html($url)
{

    $curl = curl_init();
    curl_setopt($curl, CURLOPT_URL, $url);
    curl_setopt($curl, CURLOPT_CONNECTTIMEOUT, 300);
    curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($curl, CURLOPT_HEADER, false);


    curl_setopt($curl, CURLOPT_COOKIESESSION, TRUE);
    curl_setopt($curl, CURLOPT_FOLLOWLOCATION, TRUE);
    curl_setopt($curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.8) Gecko/2009032609 Firefox/3.0.8');

    $result['EXE'] = curl_exec($curl);
    $result['INF'] = curl_getinfo($curl);
    $result['ERR'] = curl_error($curl);
    curl_close($curl);
    return $result;
}

function get_url_nav()
{
    $core = new core();
    $query = "SELECT * FROM zen_crawl_category WHERE is_crawled =0 ORDER BY id ASC LIMIT 1 ";
    $list_nav = $core->db->get_row($query);
    return $list_nav;
}

function get_total_url_nav()
{
    $core = new core();
    $query = "SELECT COUNT(id) as total FROM zen_crawl_category";
    $list_nav = $core->db->get_row($query);
    return $list_nav->total;
}

function pre_continue_crawl()
{
    $core = new core();
    $query = "SELECT * FROM zen_crawl_category WHERE is_crawled =1 ORDER BY id DESC LIMIT 1 ";
    $list_nav = $core->db->get_row($query);
    $core->db->query("UPDATE zen_crawl_category SET is_crawled=0 WHERE id=$list_nav->id");
    $core->db->query("DELETE FROM zen_crawl_category WHERE parent_id = $list_nav->id ");
    return true;
}

?>
